In [2]:
import graphlab as gl
SFrame is a tabular, column-mutable dataframe object that can scale to big data. The data in SFrame is stored column-wise, and is stored on persistent storage (e.g. disk) to avoid being constrained by memory size. Each column in an SFrame is a size-immutable SArray, but SFrames are mutable in that columns can be added and subtracted with ease. An SFrame essentially acts as an ordered dict of SArrays.
In [3]:
sf = gl.SFrame('data/people-example.csv')
In [4]:
sf # we can view first few lines of the table
Out[4]:
In [5]:
sf.head()
Out[5]:
In [6]:
sf.tail()
Out[6]:
In [8]:
sf['Country']
Out[8]:
In [9]:
sf['age'].mean()
Out[9]:
In [10]:
sf
Out[10]:
In [11]:
sf['Full Name'] = sf['First Name'] + ' ' + sf['Last Name']
In [12]:
sf
Out[12]:
In [13]:
sf['Country']
Out[13]:
In [14]:
def transform_country(country):
return 'United States' if country == 'USA' else country
In [15]:
transform_country('USA')
Out[15]:
In [16]:
transform_country('India')
Out[16]:
In [17]:
sf['Country'] = sf['Country'].apply(transform_country)
In [18]:
sf
Out[18]: